library(Zelig)
library(arm)
library(DataCombine)
library(foreign)
library(reshape2)
library(data.table)


# DATA

# old method of creating censored data:
# beckerNoLag <- read.csv("~/Documents/PMCs_Final.csv")
# becker <- slide(data = beckerNoLag, 
#	Var = "PMC_onset", 
#	slideBy = -1, 
#	GroupVar = "country",
#	NewVar = "PMC_onset_lag")
#becker$PMC_onset_censored <- becker$PMC_onset;
#becker$PMC_onset_censored[becker$PMC_onset_lag==1 & becker$PMC_onset==0] <- NA

# Import Africa Data
africa <- read.csv("~/Documents/PMC_Data/Africa.csv")

# Import PMC contract data
onsets <- read.csv("~/Documents/PMC_Data/Onsets.csv")
# Reshape data to prepare for merge with Africa dataset
pmc <- data.frame(country=NA, year=NA, pmcOnset=NA, pmcUse=NA, spearTip=NA)
for (i in 1:nrow(onsets)) {
	newOnset <- c(as.character(onsets[i,2]),onsets[i,3],1,1,onsets[i,5])
	pmc <- rbind(pmc, newOnset)
	if (onsets[i,4] > onsets[i,3]) {
		for (j in as.integer(onsets[i,3]+1):onsets[i,4]) {
			newUse <- c(as.character(onsets[i,2]),j,0,1,onsets[i,5])
			pmc <- rbind(pmc, newUse)
		}
	}
}
pmc <- pmc[-1,]
# Create two versions of pmc data, in case we want to use an alternate
# approach to censoring (discussed below).
pmcAlternateDef <- pmc
# First approach to censoring is to censor onset during use-years,
# with onset-years that occur during use dropped, while
# keeping use-years that extend beyond those of the first onset kept.
# Start by identifying rows to drop
rowsToDrop <- c()
for (i in 1:as.integer(nrow(pmc)-1)) {
	for (j in as.integer(i+1):nrow(pmc)) {
		if (pmc[j,1]==pmc[i,1] && pmc[j,2]<=pmc[i,2]) {
			rowsToDrop <- c(rowsToDrop,j)
		}
	}
}
rowsToDrop <- unique(rowsToDrop)
# Then make sure we don't lose data on spearTip from dropped rows.
# (When onsets to be dropped due to overlap include combat services, 
# mark corresponding [non-dropped] use-years as including combat services.)
for (i in 1:length(rowsToDrop)) {
	if (pmc[rowsToDrop[i],5]==1) {
		for (j in 1:nrow(pmc)) {
			if (pmc[j,1]==pmc[rowsToDrop[i],1] && pmc[j,2]==pmc[rowsToDrop[i],2]) {
				pmc[j,5] <- 1
			}
		}
	}
}
# Then drop cases.
pmc <- pmc[-rowsToDrop,]
# (Alternate definition of onset is to censored during use, 
# with years of use that occur on or after the year of 
# subsequent onsets being dropped. I don't prefer this 
# because it undermines the logic of censoring the onset 
# variable for use-years.)

# Merge data with Africa dataset
pmc$pmcOnset[pmc$pmcOnset==0] <- -99
becker <- merge(africa, pmc, all.x=T, all.y=T)
becker$pmcUse  [is.na(becker$pmcUse  )] <- 0
becker$pmcOnset[is.na(becker$pmcOnset)] <- 0
becker$pmcOnset[becker$pmcOnset==-99] <- NA
becker$pmcUse[becker$country=="Eritrea" & becker$year<1993] <- NA

becker <- transform(becker, pmcOnset = as.numeric(pmcOnset),
				  pmcUse   = as.numeric(pmcUse),
				  spearTip = as.numeric(spearTip)
)

# Add FDI data
# download data from http://data.worldbank.org/indicator/BX.KLT.DINV.WD.GD.ZS
# remove first four rows so the top row is headers, save as FDIdata.csv
fdi <- read.csv("~/FDIdata.csv")
# delete unneeded columns and rename remaining ones
setDT(fdi)[,( 2:33):=NULL]
setDT(fdi)[,(22:27):=NULL]
setDF(fdi)
colnames(fdi) <- c("country",1990:2009)
# reshape FDI data and merge with becker
fdiMelt <- melt(fdi, id=c("country"))
colnames(fdiMelt) <- c("country","year","FDIbyGDP")
beckerPlusFDI <- merge(becker, fdiMelt)



# MODELS

# Clustered Robust Standard Errors don't work with relogit in R.
# Zelig dropped this functionality without notification.
# (Running relogit with CRSE doesn't even produce an error message!)
# Therefore I ran my models in Stata.
# A do-file is available on my dataverse with code for running all models.

# Export to Stata:
write.dta(becker, "becker.dta")
write.dta(beckerPlusFDI, "beckerPlusFDI.dta")


# FIGURES

# Boxplot showing regime duration based on cox, north, weingast
regimeDuration <- data.frame(
	R1=c(1,2,7,17,34),
	R2=c(2,4,12.5,45.5,71),
	R3=c(10,34,60,88,131)
)
bxp.data=list(stats=data.matrix(regimeDuration),n=rep(1,ncol(regimeDuration)))
bxp(bxp.data)

# Table and Euler Diagram of IV joint distribution
becker$ANRdummy      <- becker$Oil + becker$Diamonds
                     becker$ANRdummy[becker$ANRdummy==2] <- 1
becker$coupOrAttempt <- becker$LagCoup + becker$LagCoupAttempt
                     becker$coupOrAttempt[becker$coupOrAttempt==2] <- 1
becker$jointDis <- becker$coupOrAttempt + 2*becker$ANRdummy + 4*becker$Civil_War
table(becker$jointDis) #use this for venn diagram by regime-year

# If you wish to see a table placing countries (instead of country years) 
# into each category based on which variables they *ever* experienced a 1 in,
# use this code: 
countries <- data.frame(country=c(), coupYears=c(), anrYears=c(), warYears=c())
for (i in 1:43) {
	countries[i,1] <- as.character(
					      becker$country[becker$countrycode==i & becker$year==2009])
	countries[i,2] <- sum(becker$coupOrAttempt[becker$countrycode==i], na.rm=T)
	countries[i,3] <- sum(becker$ANRdummy     [becker$countrycode==i], na.rm=T)
	countries[i,4] <- sum(becker$Civil_War    [becker$countrycode==i], na.rm=T)
}
countries$coupEver <- countries$coupYears
countries$anrEver  <- countries$anrYears
countries$warEver  <- countries$warYears
countries$coupEver[countries$coupEver > 1] <-1
countries$anrEver [countries$anrEver  > 1] <-1
countries$warEver [countries$warEver  > 1] <-1
countries$joint <- countries$coupEver + 2*countries$anrEver + 4*countries$warEver
table(countries$joint)

# Histogram of onsets
discrete.histogram(onsets$PMC.Onset,
	xlab="Year",
	bar.width=.8,
	prob.col="gray",
	freq=T
)
